###############################
# Analysis of conjoint data 
###############################

rm(list=ls())

# load packages 
library(foreign)
library(lmtest)
library(sandwich)
library(stargazer)
library(msm)
library(Hmisc)

# load function that does clustered SEs
vcovCluster <- function(
  model,
  cluster
)
{
  require(sandwich)
  require(lmtest)
  if(nrow(model.matrix(model))!=length(cluster)){
    stop("check your data: cluster variable has different N than model")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  return(rcse.cov)
}

##############################
# Prepare data
##############################

# load data
d = read.dta13("~/Dropbox/Cerrillos 2017/08_replication/conjoint_cerrillos_LAPS.dta")
table(d$failcon)
names(d)
dim(d)

# check data
d$idnum = as.numeric(d$idnum)
table(d$enumerator)
table(d$selected, exclude = NULL)

# proportion missing outcomes 
(table(d$selected)[4] + table(d$selected)[5])/(table(d$selected)[1] + table(d$selected)[2] + table(d$selected)[3] + table(d$selected)[4] + table(d$selected)[5])

# drop failcon and non-responses
d = subset(d, d$selected!=99)
d = subset(d, d$selected!=88)
table(d$selected, exclude = NULL)

# prepare samples
table(d$a5, exclude = NULL)

d_left = d[d$a5<5,]
table(d_left$a5)

d_right = d[d$a5>6 & d$a5<11,]
table(d_right$a5)

d_center = d[d$a5>4 & d$a5<7,]
table(d_center$a5)

d_noideo = d[d$a5==88 | d$a5==99,]
table(d_noideo$a5)

###################
# Main results
###################

# All
describe(d$idnum)
all <- lm(d$outcome ~ d$atideology + d$atprofession + d$atage)
all_c <- round(coeftest(all, vcov = vcovCluster(all, cluster = d$idnum)),4)
all_c

# Left
describe(d_left$idnum)
left <- lm(d_left$outcome ~ d_left$atideology + d_left$atprofession + d_left$atage)
summary(left)
left_c <- round(coeftest(left, vcov = vcovCluster(left, cluster = d_left$idnum)),4)
left_c

# Right
describe(d_right$idnum)
right <- lm(d_right$outcome ~ d_right$atideology + d_right$atprofession + d_right$atage)
summary(right)
right_c <- round(coeftest(right, vcov = vcovCluster(right, cluster = d_right$idnum)),4)
right_c

# Center
describe(d_center$idnum)
center <- lm(d_center$outcome ~ d_center$atideology + d_center$atprofession + d_center$atage)
summary(center)
center_c <- round(coeftest(center, vcov = vcovCluster(center, cluster = d_center$idnum)),4)
center_c

# No ideology
describe(d_noideo$idnum)
noideo <- lm(d_noideo$outcome ~ d_noideo$atideology + d_noideo$atprofession + d_noideo$atage)
summary(noideo)
noideo_c <- round(coeftest(noideo, vcov = vcovCluster(noideo, cluster = d_noideo$idnum)),4)
noideo_c

#####################
# Prepare plots
#####################

# left 
left_c # regression results
coeff = left_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = left_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/040_left.txt", sep="\t")

# right
right_c # regression results
coeff = right_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = right_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/041_right.txt", sep="\t")

# center
center_c # regression results
coeff = center_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = center_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/042_center.txt", sep="\t")

# no ideology
noideo_c # regression results
coeff = noideo_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = noideo_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ],baseline,results[4:5, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession",
                      "1b.atage","2.atage","3.atage")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/043_noideo.txt", sep="\t")
